project descrition:In this file, we are using Piet Johnson data on amphibian macroparasites to determine whether we can detect parasite induced host mortality from observed parasites distribution. Particular, we are focuing on parasites RION
Loading in the macroparasites field data and formatting it for the LRT function
data <- read.csv("~/Dropbox/pihm_project/data/archival/necropsy20092014_all2.csv", header=TRUE)
para_data_dt <- as.data.table(data)
# Step 1:finding the total number of host and parasites for each site-year-RION combo, save it to an vector
sum_RION <- para_data_dt[,list(sum_para=sum(RION),
tot_host=length(speciescode)),
by=c("year","sitename", "speciesname")]
We just look at Pseudacris regilla; subset the full data which only include Pseudacris
sum_regilla <- para_data_dt[which(speciesname=="Pseudacris regilla")]
# distribute the resulting data table according to different year, site and speciesname
# make a table for the sum of parasites and host for each year, site and species
sum_regilla_rion <- sum_regilla[,list(sum_para=sum(RION),
tot_host=length(speciescode)),
by=c("year","sitename", "speciesname")]
# saving p-value from LRT analysis, NA is not converging
NA_or_p <- array(NA, dim=c(390, 1))
for (i in 1:nrow(sum_regilla_rion)){
year_temp <- sum_regilla_rion$year[i]
site_temp <- sum_regilla_rion$sitename[i]
df_data <- sum_regilla[which(year==year_temp & sitename==site_temp)]
if(sum_regilla_rion$sum_para[i]==0){
NA_or_p[i,] <- c(NA)
}else{
# if not, do analysis
result <- tryCatch({
mu=mean(df_data$RION)
variance = var(df_data$RION)
result <- LRT(a=2, b=-0.15, k=mu^2/(variance-mu), data=df_data$RION, mu=mu)
}, error = function(err){
# change initial condition/ more iteration
# fitdistribution function
NA_or_p[i,] <-c(NA)
})
NA_or_p[i,] <- c(result[10])
} # End if
} # End for loop
Now let’s explore the results from the previous chrunk, looking at the results in NA_or_p.
# section 2: extract data with parasites and hosts which still have NA as results
tot_NA <- sum(is.na(NA_or_p))
P_smaller_0.05<- NA_or_p <= 0.05
tot_P_smaller <- sum(P_smaller_0.05, na.rm = TRUE)
P_greater_0.05 <- NA_or_p > 0.05
tot_P_larger <- sum(P_greater_0.05, na.rm = TRUE)
print(list("Total Na"=tot_NA,
"Total probability less than or equal to 0.05"=tot_P_smaller,
"Total probability larger than 0.05"=tot_P_larger))
## $`Total Na`
## [1] 268
##
## $`Total probability less than or equal to 0.05`
## [1] 2
##
## $`Total probability larger than 0.05`
## [1] 120
May due to no parasites in the dataset or there are numbers of parasites but the results are not converging. Explore the reasons that leads to the none-convergent; whether it due to statistical errors or the biological factors Step 1: extract and figure out which site is NA do not have zero parasites
# make a new column for p value or NA
sum_regilla_rion$p_value = NA_or_p
try_convergence <- sum_regilla_rion[which(sum_para!=0 & is.na(p_value)==TRUE)]
tot_distris = dim(try_convergence)[1]
print(dim(try_convergence))
## [1] 102 6
Step 2: fit a negative bionomial distrbution and find the p-value to all 102
mu_and_k_pvalue <- array(NA, dim=c(102, 3))
for (i in 1:nrow(try_convergence)) {
year_temp <- try_convergence$year[i]
site_temp <- try_convergence$sitename[i]
data_temp <- sum_regilla[which(year==year_temp & sitename==site_temp)]
hist(data_temp$RION, freq=FALSE, ylab = "Density/Probability", xlab = "number of parasites", main = paste(year_temp, site_temp))
fit_distris <- tryCatch({
mu = mean(data_temp$RION)
variance = var(data_temp$RION)
fit_distris <- fitdistr(data_temp$RION, "negative binomial", list(mu=mu, size=mu^2/(variance-mu)))
fit_distris$estimate
}, error = function(err){
return(c(NA,NA,NA))
})
mu_and_k_pvalue[i,] <- c(fit_distris[1], fit_distris[2], NA)
lines(dnbinom(0:max(data_temp$RION), size=fit_distris[2], mu=fit_distris[1]),col="red")
# running chi-square test to calculate p-value
table_RION <- table(data_temp$RION)
if (all(is.na(mu_and_k_pvalue[i,]))==FALSE &
length(levels(as.factor(table_RION))) > 1){
#para_vals = 0:100000
prob <- dnbinom((as.numeric(names(table_RION))), size=fit_distris[2], mu=fit_distris[1])* length(data_temp$RION)
# table the prob along with the table_rion
#bin_obs <- cut(para_vals, breaks = c(-1, as.numeric(names(table_RION)), 100000))
p.value <- chisq.test(table_RION, prob)[3]
mu_and_k_pvalue[i,] <- c(fit_distris[1], fit_distris[2], p.value$p.value)
} else {
mu_and_k_pvalue[i,] <- c(fit_distris[1], fit_distris[2], NA)
}
}
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect
## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect
## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect
## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect
## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): Chi-squared approximation may
## be incorrect
## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect
## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect
## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect
## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect
## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect
## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect
## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect
## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect
## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect
## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): Chi-squared approximation may
## be incorrect
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): Chi-squared approximation may
## be incorrect
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect
## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect
## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect
## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect
## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect
## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect
## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect
## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect
## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect
## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect
## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): Chi-squared approximation may
## be incorrect
## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): Chi-squared approximation may
## be incorrect
## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): Chi-squared approximation may
## be incorrect
## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect
## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect
## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect
## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): Chi-squared approximation may
## be incorrect
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): Chi-squared approximation may
## be incorrect
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): Chi-squared approximation may
## be incorrect
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect
## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect
## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect
## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect
## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect
## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect
## Warning in chisq.test(table_RION, prob): Chi-squared approximation may be
## incorrect
mu_k_pvalue_sitename_year <- do.call(rbind, Map(data.frame, year=try_convergence$year , sitename=try_convergence$sitename, mu=mu_and_k_pvalue[,1], k=mu_and_k_pvalue[,2], pvalue=mu_and_k_pvalue[,3]))
** compile didnt dit and did fit ** make a single table with the sitename, year, P, H, P-value from LRT, mu, k, p-value from chi-square test
# compiling results from the question 1
vals <- do.call(rbind, Map(data.frame, year=sum_regilla_rion$year, sitename=sum_regilla_rion$sitename, sum_para=sum_regilla_rion$sum_para,
tot_host=sum_regilla_rion$tot_host, NA_or_pvalue=NA_or_p))
#setkey function
# sitename_year <- cbind(mu_k_pvalue_sitename_year[,1], as.data.frame(mu_k_pvalue_sitename_year[,2]))
# key.temp <- setkey(as.table(mu_k_pvalue_sitename_year), sitename_year)
compiled_data <- merge(vals, mu_k_pvalue_sitename_year, by.x=c("year","sitename"), by.y=c("year","sitename"), all.x = TRUE)
**Using distribution from data; applying LRT_known_a_and_b
# LRT_known_a_and_b(mu=7.5 , k=8.5754925, data = sum_regilla[which(sitename=='Boob' & year==2009)]$RION, a= 1.67, b= -0.05)
# AIC difference = 0.006
# LRT_known_a_and_b(mu=2.6 , k=0.76, data = sum_regilla[which(sitename=='GDPND014' & year==2009)]$RION, a= 1.67, b= -0.05)
# AIC difference = 0.02
# LRT_known_a_and_b(mu=18.06 , k=5.71, data = sum_regilla[which(sitename=='Murky Bullfrog' & year==2009)]$RION, a= 1.67, b= -0.05)
# AIC difference = 0.14
# mu = mean(sum_regilla[which(sitename =='TGIF' & year == 2009)]$RION)
# variance = var(sum_regilla[which(sitename =='TGIF' & year == 2009)]$RION)
# k = mu^2/(variance-mu)
# LRT_known_a_and_b(mu = mu , k = k, data = sum_regilla[which(sitename=='TGIF' & year==2009)]$RION, a= 1.67, b= -0.05)
# AIC difference = 1.2, no conclusion but the p_value was 0.002214401 (which suggests that pihm is happening but AIC difference does not support the conclusion)
# mu = mean(sum_regilla[which(sitename =='Frog' & year == 2013)]$RION)
# variance = var(sum_regilla[which(sitename =='Frog' & year == 2013)]$RION)
# k = mu^2/(variance-mu)
# LRT_known_a_and_b(mu = mu , k = k, data = sum_regilla[which(sitename=='Frog' & year==2013)]$RION, a= 1.67, b= -0.05)
# iteraction limite maxit reached, p_value was 0.045, mu=37.6, k=1.77
# LRT_known_a_and_b(mu = 11.153846 , k = 3.7681007, data = sum_regilla[which(sitename=='TGIF' & year==2014)]$RION, a= 1.67, b= -0.05)
# AIC difference = 0.4, no conclusion made
** Looping through each year and site and run AIC test to get difference and integrate the results into one table with known a= 1.67 and b= -0.05
table_AIC <- array(NA, dim=c(390, 1))
for (i in 1:nrow(compiled_data)) {
year_i <- compiled_data$year[i]
site_i <- compiled_data$sitename[i]
data_i <- sum_regilla[which(year==year_i & sitename==site_i)]
if(is.na(compiled_data[i,5])==TRUE & is.na(compiled_data[i,6])==TRUE){
table_AIC[i,] <- c(NA)
} else if(is.na(compiled_data$NA_or_pvalue[i])==FALSE){
mu_temp = mean(data_i$RION)
variance = var(data_i$RION)
k_temp = mu_temp^2/(variance-mu_temp)
dt_AIC <- LRT_known_a_and_b(mu = mu_temp,k = k_temp, a = 1.67, b = -0.05, data = data_i$RION)
table_AIC[i,] <- c(dt_AIC[1,1]-dt_AIC[2,1])
} else{
mu_temp = compiled_data$mu[i]
k_temp = compiled_data$k[i]
dt_AIC <- LRT_known_a_and_b(mu = mu_temp,k = k_temp, a = 1.67, b = -0.05, data = data_i$RION)
table_AIC[i,] <- c(dt_AIC[1,1]-dt_AIC[2,1])
}
}
compiled_withAIC <- do.call(rbind, Map(data.frame, year=compiled_data$year, sitename=compiled_data$sitename, sum_para=compiled_data$sum_para,
tot_host=compiled_data$tot_host, NA_or_pvalue=compiled_data$NA_or_pvalue, mu = compiled_data$mu, k = compiled_data$k, pvalue= compiled_data$pvalue, dt_AIC=table_AIC))
LRT_k <- array(NA, dim=c(390, 1))
for (i in 1:nrow(compiled_data)){
year_temp <- compiled_data$year[i]
site_temp <- compiled_data$sitename[i]
df_data <- sum_regilla[which(year==year_temp & sitename==site_temp)]
if(compiled_data$sum_para[i]==0){
LRT_k[i,] <- c(NA)
}
else{
val_temp <- tryCatch({
mu=mean(df_data$RION)
val_temp <- LRT_known_k(a = 2, b = -0.15, k=1, data = df_data$RION, mu = mu)
}, error = function(err){
LRT_k[i,] <- c(NA)
})
LRT_k[i,] <- c(val_temp[10])
}
}
compiled_withAIC_with.k <- do.call(rbind, Map(data.frame, year=compiled_data$year, sitename=compiled_data$sitename, sum_para=compiled_data$sum_para,
tot_host=compiled_data$tot_host, NA_or_pvalue=compiled_data$NA_or_pvalue, mu = compiled_data$mu, k = compiled_data$k, pvalue= compiled_data$pvalue, dt_AIC=table_AIC, pvalue_with.k=LRT_k))